In this analysis I used real data from Brazil Stock Market.

# UCM
# Prof. Lorenzo Escot
# Alumno: Caio Fernandes Moreno (caiofern@ucm.es)
# Brazil Stock Market Analysis




setwd("~/git/Bitbucket/ucm/SCORE/tareas/Lorenzo-Escot")

library(tseries) # adf.test, kpss.test, bds.test, get.hist.quote, portfolio.optim, surrogate, arma, garch
#install.packages("forecast")
library(forecast)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: timeDate
## This is forecast 6.2
# En el paquete forecast tiene un modelo auto ARIMA.
#install.packages("fArma")
library(fArma) #ARMAFIT, RSFIT
## Loading required package: timeSeries
## 
## Attaching package: 'timeSeries'
## 
## The following object is masked from 'package:zoo':
## 
##     time<-
## 
## Loading required package: fBasics
## 
## 
## Rmetrics Package fBasics
## Analysing Markets and calculating Basic Statistics
## Copyright (C) 2005-2014 Rmetrics Association Zurich
## Educational Software for Financial Engineering and Computational Science
## Rmetrics is free software and comes with ABSOLUTELY NO WARRANTY.
## https://www.rmetrics.org --- Mail to: info@rmetrics.org
#install.packages("fGarch")
library(fGarch) #GARCHFIT formula ~arma (2,1) + garch (1,1) # ~ AltGr + 4
#install.packages("outliers")
library(outliers) #: outlier, rm.outlier, scores, chisq.out.test # para detectar outliers o datos an?malos ojo serie estacionaria
## 
## Attaching package: 'outliers'
## 
## The following object is masked from 'package:timeSeries':
## 
##     outlier
#install.packages("zoo")
library(zoo)
#setinternet2() #esto abre el puerto de internet



#stock.name <- "^BVSP"
#stock.description <- "IBOVESPA"



generateAnalysis <- function(x, y) {
  

stock.name <- x 
stock.description <- y  
  
  
## lectura de los datos hist?ricos del ^BVSP
stock.name <- get.hist.quote(instrument=stock.name, quote="AdjClose")

# BVSP time series starts 1993-04-27
# http://finance.yahoo.com/q?s=%5EBVSP

series.name <- stock.name

str(series.name)
summary(series.name)
start(series.name)
end(series.name)
plot(series.name)

head(series.name, 10)

tail(series.name, 10)

# Mirando los datos he decidido con la ayuda del Prof. Lorenzo no quitar los datos de 1993 hasta 1998.

#?existen datos nulos?
length(series.name)
length(series.name[!is.na(series.name)])
length(complete.cases(series.name))
#parece que no, pero si tuviera na se podr?an quitar con: ibex<-ibex[complete.cases(ibex)]

series.name<-series.name[complete.cases(series.name)]
plot(series.name)


### podemos seleccionar una submuestra: Temporal
series.name.short <-window(series.name,start=as.Date("1993-04-27"),end=as.Date("2015-09-30"))
str(series.name.short)
summary(series.name.short)
plot(series.name.short)


## Calculo la serie de rendimientos
d.series.name <- diff(log(series.name.short))

# Concatenate stock.description with the text Withall
plot.description <- paste(stock.description, "WITH ALL DATA", collapse = ", ")

#Grafico de la serie 
plot(d.series.name, main=(plot.description))

     

     
#Datos an?malos
# type = z Busca los datos tipificados mayor que 5 vezes la sd (disviacion tipica)

# Remove datos anomalos
remove.outlier.d.series.name <- d.series.name[abs(scores(d.series.name, type="z"))<=5]

#plot(remove.outlier.d.series.name, main="IBOVESPA WITHOUT OUTLIERS")

# Concatenate stock.description with the text Withall
plot.description <- paste(stock.description, " WITHOUT OUTLIERS", collapse = ", ")

#Grafico de la serie 
plot(d.series.name, main=(plot.description))


#?es estacionario?
adf.test(d.series.name)# Ho: una ra?z unitaria (no estacionaria)

# Augmented Dickey-Fuller Test
# data:  dBVSP
# Dickey-Fuller = -14.073, Lag order = 17, p-value = 0.01
# alternative hypothesis: stationary

# No es estacionaria

sd(d.series.name) #desviaci?n t?pica

# Statistical stationarity:
# http://people.duke.edu/~rnau/411diff.htm


#?presenta correlaci?n?

df.d.series.name <- as.data.frame(d.series.name)

#periodograma
par(mfrow=c(2,1))
acf(df.d.series.name, ylim=c(-1,1)) 
pacf(df.d.series.name, ylim=c(-1,1))


tsdisplay(df.d.series.name)


# test bds
bds.test(d.series.name,m=10) # H0: i.i.d

#test R/, exponente de Hurst
HURST<-rsFit(d.series.name, doplot = T)# Exponente de Hurst 0.5 ruido blanco
HURST

##Se puede hacer el test de Hurst=0.5 con el siguiente estad?stico t ## 

t<-(HURST@hurst$diag[2,1]-0.5)/HURST@hurst$diag[2,2]
t

#Modelo Auto Arima

modelo.auto.arima <- auto.arima(d.series.name)
plot(forecast(modelo.auto.arima,h=20))


modelo.auto.arima1 <- auto.arima(d.series.name)
plot(forecast(modelo.auto.arima1, h=1))


# alternativa
d.series.name.ARMA<-armaFit(~ arma(1,3), data=d.series.name)
summary(d.series.name.ARMA, wich="all")
residuo<-residuals(d.series.name.ARMA)


plot(residuo)
lines(residuo)

df.residuo <- as.data.frame(residuo)

#periodograma
par(mfrow=c(2,1))
acf(df.residuo, ylim=c(-1,1)) 
pacf(df.residuo, ylim=c(-1,1))

#x11()
tsdisplay(df.residuo)


# test bds
bds.test(d.series.name,m=10) # H0: i.i.d

#test R/, exponente de Hurst
HURST<-rsFit(d.series.name, doplot = T)# Exponente de Hurst 0.5 ruido blanco
HURST


# Este codigo ha tenido muchas modificaciones hay que cojer el codigo del profesor Lorenzo.

##Se puede hacer el test de Hurst=0.5 con el siguiente estad?stico t ## 

t<-(HURST@hurst$diag[2,1]-0.5)/HURST@hurst$diag[2,2]
t


}



# IBOVESPA
stock.name <- "^BVSP"
stock.description <- "IBOVESPA"
# Call your function and pass x and y to the function
run <- generateAnalysis(stock.name,stock.description)
## time series starts 1993-04-27
## 'zoo' series from 1993-04-27 to 2015-11-16
##   Data: num [1:5603, 1] 24.5 24.3 23.7 24.1 24.1 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "AdjClose"
##   Index:  Date[1:5603], format: "1993-04-27" "1993-04-28" "1993-04-29" "1993-04-30" ...

## 'zoo' series from 1993-04-27 to 2015-09-30
##   Data: num [1:5572] 24.5 24.3 23.7 24.1 24.1 ...
##   Index:  Date[1:5572], format: "1993-04-27" "1993-04-28" "1993-04-29" "1993-04-30" ...

## Warning in adf.test(d.series.name): p-value smaller than printed p-value

## Warning in sqrt(diag(fit$var.coef)): NaNs produced

## 
## Title:
##  ARIMA Modelling 
## 
## Call:
##  armaFit(formula = ~arma(1, 3), data = d.series.name)
## 
## Model:
##  ARIMA(1,0,3) with method: CSS-ML
## 
## Coefficient(s):
##       ar1        ma1        ma2        ma3  intercept  
##  0.028451   0.027562  -0.006066  -0.012289   0.001349  
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.1724656 -0.0116332 -0.0002197  0.0116098  0.2916280 
## 
## Moments: 
## Skewness Kurtosis 
##   0.5518  10.4767 
## 
## Coefficient(s):
##             Estimate  Std. Error  t value Pr(>|t|)    
## ar1        0.0284509          NA       NA       NA    
## ma1        0.0275620          NA       NA       NA    
## ma2       -0.0060665          NA       NA       NA    
## ma3       -0.0122894   0.0034857   -3.526 0.000422 ***
## intercept  0.0013495   0.0003251    4.151 3.31e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## sigma^2 estimated as: 0.0005446
## log likelihood:       13029.24
## AIC Criterion:        -26046.48

## 
## Description:
##  Tue Nov 17 20:32:17 2015 by user:

# Itau
stock.name <- "ITSA4.SA"
stock.description <- "Itausa - Investimentos Itau S.A"
# Call your function and pass x and y to the function
run <- generateAnalysis(stock.name,stock.description)
## time series starts 2000-01-03
## 'zoo' series from 2000-01-03 to 2015-11-16
##   Data: num [1:3952, 1] 1.18 1.07 1.15 1.17 1.17 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "AdjClose"
##   Index:  Date[1:3952], format: "2000-01-03" "2000-01-04" "2000-01-05" "2000-01-06" ...

## 'zoo' series from 2000-01-03 to 2015-09-30
##   Data: num [1:3919] 1.18 1.07 1.15 1.17 1.17 ...
##   Index:  Date[1:3919], format: "2000-01-03" "2000-01-04" "2000-01-05" "2000-01-06" ...

## Warning in adf.test(d.series.name): p-value smaller than printed p-value

## Warning in sqrt(diag(fit$var.coef)): NaNs produced

## 
## Title:
##  ARIMA Modelling 
## 
## Call:
##  armaFit(formula = ~arma(1, 3), data = d.series.name)
## 
## Model:
##  ARIMA(1,0,3) with method: CSS-ML
## 
## Coefficient(s):
##        ar1         ma1         ma2         ma3   intercept  
## -0.4637719   0.4593908  -0.0346233  -0.0623858   0.0004736  
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.1513665 -0.0126110 -0.0003865  0.0120425  0.2207712 
## 
## Moments: 
## Skewness Kurtosis 
##    0.159    5.837 
## 
## Coefficient(s):
##             Estimate  Std. Error  t value Pr(>|t|)    
## ar1       -0.4637719          NA       NA       NA    
## ma1        0.4593908          NA       NA       NA    
## ma2       -0.0346233   0.0181061   -1.912 0.055845 .  
## ma3       -0.0623858   0.0161051   -3.874 0.000107 ***
## intercept  0.0004736   0.0003491    1.357 0.174921    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## sigma^2 estimated as: 0.0005501
## log likelihood:       9143.71
## AIC Criterion:        -18275.42

## 
## Description:
##  Tue Nov 17 20:32:24 2015 by user:

# BBAS3.SA
stock.name <- "BBAS3.SA"
stock.description <- "Banco do Brasil S.A."
# Call your function and pass x and y to the function
run <- generateAnalysis(stock.name,stock.description)
## time series starts 2000-01-03
## 'zoo' series from 2000-01-03 to 2015-11-16
##   Data: num [1:4097, 1] 1.89 1.8 1.82 1.85 1.8 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "AdjClose"
##   Index:  Date[1:4097], format: "2000-01-03" "2000-01-04" "2000-01-05" "2000-01-06" ...

## 'zoo' series from 2000-01-03 to 2015-09-30
##   Data: num [1:4064] 1.89 1.8 1.82 1.85 1.8 ...
##   Index:  Date[1:4064], format: "2000-01-03" "2000-01-04" "2000-01-05" "2000-01-06" ...

## Warning in adf.test(d.series.name): p-value smaller than printed p-value

## Warning in sqrt(diag(fit$var.coef)): NaNs produced

## 
## Title:
##  ARIMA Modelling 
## 
## Call:
##  armaFit(formula = ~arma(1, 3), data = d.series.name)
## 
## Model:
##  ARIMA(1,0,3) with method: CSS-ML
## 
## Coefficient(s):
##        ar1         ma1         ma2         ma3   intercept  
## -0.0060659  -0.0058871  -0.0409504  -0.0208741   0.0005117  
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.1642276 -0.0152146 -0.0004861  0.0143946  0.2561532 
## 
## Moments: 
## Skewness Kurtosis 
##   0.4049   5.0957 
## 
## Coefficient(s):
##             Estimate  Std. Error  t value Pr(>|t|)    
## ar1       -0.0060659          NA       NA       NA    
## ma1       -0.0058871          NA       NA       NA    
## ma2       -0.0409504   0.0112869   -3.628 0.000285 ***
## ma3       -0.0208741          NA       NA       NA    
## intercept  0.0005117   0.0003929    1.302 0.192873    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## sigma^2 estimated as: 0.0007293
## log likelihood:       8909.2
## AIC Criterion:        -17806.39

## 
## Description:
##  Tue Nov 17 20:32:30 2015 by user:

# KROT3.SA
stock.name <- "KROT3.SA"
stock.description <- "Kroton Educacional S.A."
# Call your function and pass x and y to the function
run <- generateAnalysis(stock.name,stock.description)
## time series starts 2012-03-14
## 'zoo' series from 2012-03-14 to 2015-11-16
##   Data: num [1:957, 1] 2.21 2.21 2.21 2.21 2.21 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "AdjClose"
##   Index:  Date[1:957], format: "2012-03-14" "2012-03-15" "2012-03-16" "2012-03-19" ...

## 'zoo' series from 2012-03-14 to 2015-09-30
##   Data: num [1:924] 2.21 2.21 2.21 2.21 2.21 ...
##   Index:  Date[1:924], format: "2012-03-14" "2012-03-15" "2012-03-16" "2012-03-19" ...

## Warning in adf.test(d.series.name): p-value smaller than printed p-value

## Warning in lsfit(log10(M), log10(RS), wt): 29 missing values deleted

## 
## Title:
##  ARIMA Modelling 
## 
## Call:
##  armaFit(formula = ~arma(1, 3), data = d.series.name)
## 
## Model:
##  ARIMA(1,0,3) with method: CSS-ML
## 
## Coefficient(s):
##       ar1        ma1        ma2        ma3  intercept  
##  0.426797  -0.560552   0.053052  -0.238926   0.001384  
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.391973 -0.011893 -0.001244  0.015092  1.275535 
## 
## Moments: 
## Skewness Kurtosis 
##   -1.295  120.049 
## 
## Coefficient(s):
##            Estimate  Std. Error  t value Pr(>|t|)    
## ar1        0.426797    0.076118    5.607 2.06e-08 ***
## ma1       -0.560552    0.074747   -7.499 6.42e-14 ***
## ma2        0.053052    0.043406    1.222    0.222    
## ma3       -0.238926    0.031674   -7.543 4.57e-14 ***
## intercept  0.001384    0.001440    0.961    0.336    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## sigma^2 estimated as: 0.00969
## log likelihood:       829.97
## AIC Criterion:        -1647.94

## 
## Description:
##  Tue Nov 17 20:32:33 2015 by user:

## Warning in lsfit(log10(M), log10(RS), wt): 29 missing values deleted

# VALE5.SA
stock.name <- "VALE5.SA"
stock.description <- "Vale S.A."
# Call your function and pass x and y to the function
run <- generateAnalysis(stock.name,stock.description)
## time series starts 2003-01-01
## 'zoo' series from 2003-01-01 to 2015-11-16
##   Data: num [1:3191, 1] 6.07 6.07 6 5.84 5.71 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "AdjClose"
##   Index:  Date[1:3191], format: "2003-01-01" "2003-01-02" "2003-01-03" "2003-01-06" ...

## 'zoo' series from 2003-01-01 to 2015-09-30
##   Data: num [1:3158] 6.07 6.07 6 5.84 5.71 ...
##   Index:  Date[1:3158], format: "2003-01-01" "2003-01-02" "2003-01-03" "2003-01-06" ...

## Warning in adf.test(d.series.name): p-value smaller than printed p-value

## 
## Title:
##  ARIMA Modelling 
## 
## Call:
##  armaFit(formula = ~arma(1, 3), data = d.series.name)
## 
## Model:
##  ARIMA(1,0,3) with method: CSS-ML
## 
## Coefficient(s):
##        ar1         ma1         ma2         ma3   intercept  
## -0.1147017   0.1352835  -0.0466870  -0.0628895   0.0002432  
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.1591899 -0.0125681  0.0002449  0.0123261  0.1234791 
## 
## Moments: 
## Skewness Kurtosis 
##  -0.0741   3.2827 
## 
## Coefficient(s):
##             Estimate  Std. Error  t value Pr(>|t|)   
## ar1       -0.1147017   0.2806508   -0.409  0.68276   
## ma1        0.1352835   0.2799768    0.483  0.62896   
## ma2       -0.0466870   0.0185914   -2.511  0.01203 * 
## ma3       -0.0628895   0.0207959   -3.024  0.00249 **
## intercept  0.0002432   0.0003799    0.640  0.52203   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## sigma^2 estimated as: 0.0005368
## log likelihood:       7406.32
## AIC Criterion:        -14800.64

## 
## Description:
##  Tue Nov 17 20:32:36 2015 by user: